External validation, recalibration, and clinical utility of the prognostic model kidney failure risk equation in patients with CKD stages G3-4: a nationwide retrospective cohort analysis in Peru

Main Analysis - Winsorization 1.5%: 7. Clinical Utility

Author

Percy Soto Becerra

1 Setup

rm(list = ls())

# Use pacman to check whether packages are installed, if not load
if (!require("pacman")) install.packages("pacman")
library(pacman)

# Unload all package to begin in a session with only base package

# Install packages
pacman::p_load(
  rio, 
  here, 
  tidyverse, 
  # gtools, 
  knitr, 
  kableExtra, 
  ggsci, 
  patchwork,
  # flextable, 
  furrr,
  parallel,
  mice,
  # survival,
  # prodlim, 
  # cmprsk,
  # riskRegression,
  # pec, 
  # splines, 
  dcurves,
  RColorBrewer
)
source(here("Code", "source", "kfre_pi.R"))
source(here("Code", "source", "kfre_pr.R"))

2 Load data

# Import data
imp.datos<- readRDS(here::here("Data", "Tidy", "Main-Winsorize-1_5",  "data_impA.rds")) |> 
  mutate(acr = exp(log_acr), 
         urine_crea = exp(log_urine_crea), 
         urine_album = exp(log_urine_album), 
         acr_cat = case_when(acr < 30 ~ "A1", 
                             acr <= 300 & acr >= 30 ~ "A2", 
                             acr > 300 ~ "A3", 
                             TRUE ~ as.character(NA)), 
         crit_ietsi = case_when((grf_cat == "G4" | 
                                (grf_cat == "G3b" & acr_cat == "A3")) & 
                                  !is.na(acr_cat) & !is.na(grf_cat) ~ 1, 
                                is.na(acr_cat) | is.na(grf_cat) ~ NA, 
                                TRUE ~ 0), 
         crit_nice2014 = case_when((grf_cat == "G4" | 
                                      (grf_cat == "G3b" & acr > 619.47)) & # 619.47 = 70 mg/mmol
                                     !is.na(acr) & !is.na(grf_cat) ~ 1, 
                                   is.na(acr) | is.na(grf_cat) ~ NA, 
                                   TRUE ~ 0))  

# Note: Conversor of units of uACR: https://www.scymed.com/en/smnxps/psdjb222_c.htm      
rm(data_imp)
gc()
            used   (Mb) gc trigger   (Mb)  max used   (Mb)
Ncells   2416714  129.1    3904690  208.6   3904690  208.6
Vcells 134573395 1026.8  217813506 1661.8 210444554 1605.6

3 Clinical Utility

imp.datos %>% 
  count(eventdf)

4 Select a imputed dataset

# Original KFRE
imp.datos <- imp.datos %>% 
    mutate(risk2y = kfre_pr(imp.datos, 2),
         risk5y = kfre_pr(imp.datos, 5), 
         pi = kfre_pi(imp.datos)) %>% 
    filter(.imp != 0) 

# Metod A
df_recal_metA <- import(here("Data", "Tidy", "Main-Winsorize-1_5",  "equations", "df_recal_modA.rds"))

df_recal_metA2y <- df_recal_metA |> 
  filter(year == 2) |> 
  select(-year) |> 
  rename(st0_imp2y = st0_imp, 
         fc_coef_imp2y = fc_coef_imp)

df_recal_metA5y <- df_recal_metA |> 
  filter(year == 5) |> 
  select(-year) |> 
  rename(st0_imp5y = st0_imp, 
         fc_coef_imp5y = fc_coef_imp)

imp.datos <- imp.datos |> 
  left_join(df_recal_metA2y, by = ".imp") |> 
  left_join(df_recal_metA5y, by = ".imp") |> 
  mutate(risk2y_metA = 1 - st0_imp2y ^ exp(fc_coef_imp2y * pi), 
        risk5y_metA = 1 - st0_imp5y ^ exp(fc_coef_imp5y * pi)) |> 
  select(.imp, .id, time, eventd, eventdf, pi, starts_with("risk2y"), 
         starts_with("risk5y"), crit_ietsi, crit_nice2014)

# Metod B
df_recal_metB <- import(here("Data", "Tidy", "Main-Winsorize-1_5",  "equations", "df_recal_modB.rds"))

df_recal_metB2y <- df_recal_metB |> 
  filter(year == 2) |> 
  select(-year) |> 
  rename(st0_imp2y = st0_imp, 
         fc_coef_imp2y = fc_coef_imp)

df_recal_metB5y <- df_recal_metB |> 
  filter(year == 5) |> 
  select(-year) |> 
  rename(st0_imp5y = st0_imp, 
         fc_coef_imp5y = fc_coef_imp)

imp.datos <- imp.datos |> 
  left_join(df_recal_metB2y, by = ".imp") |> 
  left_join(df_recal_metB5y, by = ".imp") |> 
  mutate(risk2y_metB = 1 - st0_imp2y ^ exp(fc_coef_imp2y * pi), 
        risk5y_metB = 1 - st0_imp5y ^ exp(fc_coef_imp5y * pi)) |> 
  select(.imp, .id, time, eventd, eventdf, pi, starts_with("risk2y"), 
         starts_with("risk5y"), crit_ietsi, crit_nice2014)

# Metod C
df_recal_metC <- import(here("Data", "Tidy", "Main-Winsorize-1_5",  "equations", "df_recal_modC.rds"))

df_recal_metC2y <- df_recal_metC |> 
  filter(year == 2) |> 
  select(-year) |> 
  rename(st0_imp2y = st0_imp, 
         fc_coef_imp2y = fc_coef_imp)

df_recal_metC5y <- df_recal_metC |> 
  filter(year == 5) |> 
  select(-year) |> 
  rename(st0_imp5y = st0_imp, 
         fc_coef_imp5y = fc_coef_imp)

imp.datos <- imp.datos |> 
  left_join(df_recal_metC2y, by = ".imp") |> 
  left_join(df_recal_metC5y, by = ".imp") |> 
  mutate(risk2y_metC = 1 - st0_imp2y ^ exp(fc_coef_imp2y * pi), 
        risk5y_metC = 1 - st0_imp5y ^ exp(fc_coef_imp5y * pi)) |> 
  select(.imp, .id, time, eventd, eventdf, pi, starts_with("risk2y"), 
         starts_with("risk5y"), crit_ietsi, crit_nice2014)

# Metod D
df_recal_metD <- import(here("Data", "Tidy", "Main-Winsorize-1_5",  "equations", "df_recal_modD.rds"))

df_recal_metD2y <- df_recal_metD |> 
  filter(year == 2) |> 
  select(-year) |> 
  rename(st0_imp2y = st0_imp, 
         fc_coef_imp2y = fc_coef_imp)

df_recal_metD5y <- df_recal_metD |> 
  filter(year == 5) |> 
  select(-year) |> 
  rename(st0_imp5y = st0_imp, 
         fc_coef_imp5y = fc_coef_imp)

imp.datos <- imp.datos |> 
  left_join(df_recal_metD2y, by = ".imp") |> 
  left_join(df_recal_metD5y, by = ".imp") |> 
  mutate(risk2y_metD = 1 - st0_imp2y ^ exp(fc_coef_imp2y * pi), 
        risk5y_metD = 1 - st0_imp5y ^ exp(fc_coef_imp5y * pi)) |> 
  select(.imp, .id, time, eventd, eventdf, pi, starts_with("risk2y"), 
         starts_with("risk5y"), crit_ietsi, crit_nice2014)
imp.datos_filter <- imp.datos |>  
  filter(.imp > 0)

4.1 5-years

# Configura furrr para usar 10 núcleos
n_cores <- (detectCores() - 2) / 2
plan(multisession, workers = n_cores)

# Define la función para calcular las curvas de beneficio neto
calculate_net_benefit <- function(imp_data) {
  net_curves <- dca(Surv(time, eventdf) ~ risk5y + risk5y_metA + risk5y_metB + 
                      risk5y_metC + risk5y_metD + crit_ietsi + crit_nice2014,  
                    data = imp_data, 
                    time = 5, 
                    thresholds = seq(0.00, 0.06, 0.01), 
                    label = list(risk5y = "KFRE original", 
                                 risk5y_metA = "KFRE recalibrated with method A", 
                                 risk5y_metB = "KFRE recalibrated with method B",
                                 risk5y_metC = "KFRE recalibrated with method C", 
                                 risk5y_metD = "KFRE recalibrated with method D", 
                                 crit_ietsi = "Peruvian National Guidelines", 
                                 crit_nice2014 = "NICE 2014 Guidelines")) |> 
    net_intervention_avoided()
  
  net_curves_df <- as_tibble(net_curves)
  net_curves_df
}

# Filtra los datos por número de imputación
imputations <- unique(imp.datos_filter$.imp)

# Aplica la función a cada conjunto de datos imputado
results <- future_map(imputations, function(imp_num) {
  imp_data <- filter(imp.datos_filter, .imp == imp_num)
  calculate_net_benefit(imp_data)
})

# Combina los resultados en un solo tibble
final_net_benefit5y <- bind_rows(results, .id = "imputation")

# Cierra la sesión de future y libera los recursos utilizados
plan(sequential)
future:::ClusterRegistry("stop")

## Pool de curvas de decision
average_imputations <- function(df) {
  df %>%
    group_by(variable, label, n, threshold) %>%
    summarize(
      pos_rate = mean(pos_rate),
      tp_rate = mean(tp_rate),
      fp_rate = mean(fp_rate),
      harm = mean(harm),
      net_benefit = mean(net_benefit),
      net_intervention_avoided = mean(net_intervention_avoided, na.rm = TRUE), 
      .groups = "drop"
    )
}

pooled_net_benefit5y <- average_imputations(final_net_benefit5y)

## Guardar datos par curva de beneficio
export(final_net_benefit5y , here("Data", "Tidy", "Main-Winsorize-1_5",  "final_net_benefit5y.rds"))
export(pooled_net_benefit5y, here("Data", "Tidy", "Main-Winsorize-1_5",  "pooled_net_benefit5y.rds"))

# Define una paleta de colores seria usando Dark2
color_palette <- brewer.pal(8, "Dark2")
# Filtra los datos y crea el gráfico
p1 <- pooled_net_benefit5y %>%
  filter(!is.na(net_benefit)) %>%
  mutate(label = fct_recode(label, 
                            "Referral All" = "Treat All", 
                            "Referrall None" = "Treat None")) |> 
  ggplot(aes(x = threshold, y = net_benefit, color = label, linetype = label)) +
  geom_line(size = 1, alpha = 0.8) +
  coord_cartesian(ylim = c(-0.01, 0.05)) +
  scale_x_continuous(breaks = seq(0.00, 0.06, 0.01), 
                     labels = c("0%\n(0:100)", "1%\n(1:99)", "2%\n(1:49)", 
                                "3%\n(1:33)", "4%\n(1:25)", 
                                "5%\n(1:19)", "6%\n(1:17)")) +
  labs(x = "Threshold Probability\n(Harm:Benefit ratio)", y = "Net Benefit", color = "", linetype = "", 
       title = "5-years") +
  theme_bw() + 
  scale_fill_npg("nrc") + 
  theme(plot.title = element_text(hjust = 0.5))
p1

4.1.1 Table of benefit curves

pooled_net_benefit5y |> 
  filter(threshold %in% c(seq(0.00, 0.06, by = 0.01))) |> 
  select(label, threshold, net_benefit, net_intervention_avoided) |> 
  pivot_wider(id_cols = threshold, names_from = label, values_from = c(net_benefit)) |> 
  kbl() |> 
  kable_styling()
threshold Treat All Peruvian National Guidelines NICE 2014 Guidelines Treat None KFRE original KFRE recalibrated with method A KFRE recalibrated with method B KFRE recalibrated with method C KFRE recalibrated with method D
0.00 0.0476187 0.0322410 0.0311111 0 0.0476187 0.0476187 0.0476187 0.0476187 0.0476187
0.01 0.0379987 0.0309380 0.0299679 0 0.0390100 0.0390114 0.0392271 0.0389562 0.0393666
0.02 0.0281824 0.0296083 0.0288014 0 0.0341625 0.0341706 0.0343256 0.0339863 0.0346972
0.03 0.0181636 0.0282513 0.0276108 0 0.0307393 0.0307561 0.0313886 0.0302957 0.0316192
0.04 0.0079362 0.0268660 0.0263953 0 0.0277805 0.0277820 0.0286541 0.0274819 0.0288138
0.05 -0.0025066 0.0254515 0.0251543 0 0.0254845 0.0254949 0.0263825 0.0252173 0.0261173
0.06 -0.0131716 0.0240069 0.0238869 0 0.0233093 0.0233732 0.0241375 0.0229960 0.0240224

4.2 2-years

# Configura furrr para usar 10 núcleos
plan(multisession, workers = n_cores)

# Define la función para calcular las curvas de beneficio neto
calculate_net_benefit <- function(imp_data) {
  net_curves <- dca(Surv(time, eventdf) ~ risk2y + risk2y_metA + risk2y_metB + 
                      risk2y_metC + risk2y_metD + crit_ietsi + crit_nice2014,  
                    data = imp_data, 
                    time = 2, 
                    thresholds = seq(0.00, 0.50, 0.1), 
                    label = list(risk2y = "KFRE original", 
                                 risk2y_metA = "KFRE recalibrated with method A", 
                                 risk2y_metB = "KFRE recalibrated with method B",
                                 risk2y_metC = "KFRE recalibrated with method C", 
                                 risk2y_metD = "KFRE recalibrated with method D", 
                                 crit_ietsi = "Peruvian National Guidelines", 
                                 crit_nice2014 = "NICE 2014 Guidelines")) |> 
    net_intervention_avoided()
  
  net_curves_df <- as_tibble(net_curves)
  net_curves_df
}

# Filtra los datos por número de imputación
imputations <- unique(imp.datos_filter$.imp)

# Aplica la función a cada conjunto de datos imputado
results <- future_map(imputations, function(imp_num) {
  imp_data <- filter(imp.datos_filter, .imp == imp_num)
  calculate_net_benefit(imp_data)
})

# Combina los resultados en un solo tibble
final_net_benefit2y <- bind_rows(results, .id = "imputation")

# Cierra la sesión de future y libera los recursos utilizados
plan(sequential)
future:::ClusterRegistry("stop")

## Pool de curvas de decision
pooled_net_benefit2y <- average_imputations(final_net_benefit2y)

## Guardar datos par curva de beneficio
export(final_net_benefit2y , here("Data", "Tidy", "Main-Winsorize-1_5",  "final_net_benefit2y.rds"))
export(pooled_net_benefit2y, here("Data", "Tidy", "Main-Winsorize-1_5",  "pooled_net_benefit2y.rds"))

# Define una paleta de colores seria usando Dark2
color_palette <- brewer.pal(8, "Dark2")
# Filtra los datos y crea el gráfico
p2 <- pooled_net_benefit2y %>%
  filter(!is.na(net_benefit)) %>%  
  mutate(label = fct_recode(label, 
                            "Referral All" = "Treat All", 
                            "Referrall None" = "Treat None")) |> 
  ggplot(aes(x = threshold, y = net_benefit, color = label, linetype = label)) +
  geom_line(size = 1, alpha = 0.8) +
  coord_cartesian(ylim = c(-0.005, 0.03)) +
  scale_x_continuous(labels = c("0%\n(0:100)", "10%\n(1:9)", "20%\n(1:5)", 
                                "30%\n(1:2.3)", "40%\n(1:1.5)", "50%\n(1:1)")) +
  labs(x = "Threshold Probability\n(Harm:Benefit ratio)", y = "Net Benefit", color = "", linetype = "", 
       title = "2-years") +
  theme_bw() + 
  scale_fill_npg("nrc") + 
  theme(plot.title = element_text(hjust = 0.5))

p_final <- p2 + p1 + 
  plot_annotation(tag_levels = "a") +
  plot_layout(guides = 'collect')

ggsave(filename = "net_benefit.jpeg", 
       plot = p_final, 
       device = "jpeg", 
       path = here("Figures", "Main-Winsorize-1_5"), 
       scale = 2, 
       width = 12, 
       height = 6, 
       units = "cm", 
       dpi = 1000)
p2

4.2.1 Table of benefit curves

pooled_net_benefit2y |> 
  filter(threshold %in% c(seq(0.00, 0.50, by = 0.1))) |> 
  select(label, threshold, net_benefit, net_intervention_avoided) |> 
  pivot_wider(id_cols = threshold, names_from = label, values_from = c(net_benefit)) |> 
  kbl() |> 
  kable_styling()
threshold Treat All Peruvian National Guidelines NICE 2014 Guidelines Treat None KFRE original KFRE recalibrated with method A KFRE recalibrated with method B KFRE recalibrated with method C KFRE recalibrated with method D
0.0 0.0273046 0.0204121 0.0199747 0 0.0273046 0.0273046 0.0273046 0.0273046 0.0273046
0.1 -0.0807727 0.0047643 0.0061619 0 0.0044966 0.0064862 0.0066057 0.0064387 0.0064473
0.2 -0.2158693 -0.0147953 -0.0111042 0 0.0015772 0.0012658 0.0015383 0.0012879 0.0015567
0.3 -0.3895649 -0.0399435 -0.0333034 0 0.0000022 -0.0005490 -0.0002245 -0.0004646 -0.0001334
0.4 -0.6211590 -0.0734743 -0.0629023 0 -0.0003106 -0.0022581 -0.0003346 -0.0021480 -0.0003294
0.5 -0.9453908 -0.1204175 -0.1043408 0 -0.0007732 -0.0029423 -0.0004806 -0.0026287 -0.0003822
include_graphics(here("Figures", "Main-Winsorize-1_5",  "net_benefit.jpeg"))

5 Ticket de reproducibilidad

sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 24.04 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.12.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0

locale:
 [1] LC_CTYPE=es_ES.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_ES.UTF-8        LC_COLLATE=es_ES.UTF-8    
 [5] LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=es_ES.UTF-8   
 [7] LC_PAPER=es_ES.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       

time zone: America/Lima
tzcode source: system (glibc)

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] RColorBrewer_1.1-3 dcurves_0.5.0      mice_3.16.0        furrr_0.3.1       
 [5] future_1.34.0      patchwork_1.2.0    ggsci_3.2.0        kableExtra_1.4.0  
 [9] knitr_1.48         lubridate_1.9.3    forcats_1.0.0      stringr_1.5.1     
[13] dplyr_1.1.4        purrr_1.0.2        readr_2.1.5        tidyr_1.3.1       
[17] tibble_3.2.1       ggplot2_3.5.1      tidyverse_2.0.0    here_1.0.1        
[21] rio_1.2.2          pacman_0.5.1      

loaded via a namespace (and not attached):
 [1] tidyselect_1.2.1  viridisLite_0.4.2 farver_2.1.2      R.utils_2.12.3   
 [5] fastmap_1.2.0     digest_0.6.37     rpart_4.1.23      timechange_0.3.0 
 [9] lifecycle_1.0.4   survival_3.7-0    magrittr_2.0.3    compiler_4.4.1   
[13] rlang_1.1.4       tools_4.4.1       utf8_1.2.4        yaml_2.3.10      
[17] labeling_0.4.3    htmlwidgets_1.6.4 xml2_1.3.6        withr_3.0.1      
[21] R.oo_1.26.0       nnet_7.3-19       grid_4.4.1        fansi_1.0.6      
[25] jomo_2.7-6        colorspace_2.1-1  globals_0.16.3    scales_1.3.0     
[29] iterators_1.0.14  MASS_7.3-61       cli_3.6.3         rmarkdown_2.28   
[33] ragg_1.3.2        generics_0.1.3    rstudioapi_0.16.0 tzdb_0.4.0       
[37] minqa_1.2.8       splines_4.4.1     vctrs_0.6.5       boot_1.3-30      
[41] glmnet_4.1-8      Matrix_1.7-0      jsonlite_1.8.8    hms_1.1.3        
[45] mitml_0.4-5       listenv_0.9.1     jpeg_0.1-10       systemfonts_1.1.0
[49] foreach_1.5.2     glue_1.7.0        parallelly_1.38.0 nloptr_2.1.1     
[53] pan_1.9           codetools_0.2-20  stringi_1.8.4     gtable_0.3.5     
[57] shape_1.4.6.1     lme4_1.1-35.5     munsell_0.5.1     pillar_1.9.0     
[61] htmltools_0.5.8.1 R6_2.5.1          textshaping_0.4.0 rprojroot_2.0.4  
[65] evaluate_0.24.0   lattice_0.22-5    highr_0.11        R.methodsS3_1.8.2
[69] backports_1.5.0   broom_1.0.6       Rcpp_1.0.13       svglite_2.1.3    
[73] nlme_3.1-165      xfun_0.47         pkgconfig_2.0.3